by SAHIL NAIR
# this cell contains our library imports
import pandas as pd
import altair as alt
import geopandas as geopandas
import contextily as cx
import folium as folium
from pysal.viz import mapclassify
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
Objective: To understand the data set
rawdata = pd.read_csv('Mel_listings.csv')
rawdata
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9835 | Beautiful Room & House | 33057 | Manju | NaN | Manningham | -37.772470 | 145.092320 | Private room | 60 | 1 | 4 | 2015-09-12 | 0.03 | 1 | 365 | 0 | NaN |
| 1 | 12936 | St Kilda 1BR+BEACHSIDE+BALCONY+WIFI+AC | 50121 | The A2C Team | NaN | Port Phillip | -37.859990 | 144.976620 | Entire home/apt | 95 | 3 | 42 | 2016-01-25 | 0.74 | 10 | 0 | 0 | NaN |
| 2 | 38271 | Melbourne - Old Trafford Apartment | 164193 | Daryl & Dee | NaN | Casey | -38.057250 | 145.339360 | Entire home/apt | 100 | 1 | 162 | 2020-01-09 | 1.23 | 1 | 345 | 13 | NaN |
| 3 | 41836 | CLOSE TO CITY & MELBOURNE AIRPORT | 182833 | Diana | NaN | Darebin | -37.697290 | 145.000820 | Private room | 40 | 7 | 159 | 2017-08-24 | 1.72 | 2 | 0 | 0 | NaN |
| 4 | 43429 | Tranquil Javanese-Style Apartment in Oakleigh ... | 189684 | Allan | NaN | Monash | -37.899600 | 145.114470 | Entire home/apt | 99 | 2 | 214 | 2019-08-09 | 3.41 | 2 | 297 | 4 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18362 | 52136491 | Contemporary Spacious One BDR Apt in Southbank | 161697557 | Eachann | NaN | Melbourne | -37.826921 | 144.960394 | Entire home/apt | 80 | 180 | 0 | NaN | NaN | 64 | 357 | 0 | NaN |
| 18363 | 52136551 | Spacious One BDR Apt w/ Amazing Views | 161697557 | Eachann | NaN | Port Phillip | -37.828707 | 144.958652 | Entire home/apt | 70 | 180 | 0 | NaN | NaN | 64 | 191 | 0 | NaN |
| 18364 | 52137025 | Stylish Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Melbourne | -37.846132 | 144.979593 | Entire home/apt | 90 | 180 | 0 | NaN | NaN | 64 | 223 | 0 | NaN |
| 18365 | 52137124 | Chic Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Port Phillip | -37.847558 | 144.977845 | Entire home/apt | 100 | 180 | 0 | NaN | NaN | 64 | 358 | 0 | NaN |
| 18366 | 52137197 | Cozy Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Melbourne | -37.846641 | 144.979528 | Entire home/apt | 88 | 180 | 0 | NaN | NaN | 64 | 358 | 0 | NaN |
18367 rows × 18 columns
rawneighbourhoods = geopandas.read_file('Melneighbourhoods.geojson')
rawneighbourhoods
| neighbourhood | neighbourhood_group | geometry | |
|---|---|---|---|
| 0 | Greater Dandenong | None | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... |
| 1 | Wyndham | None | MULTIPOLYGON (((144.82749 -37.82280, 144.82654... |
| 2 | Hume | None | MULTIPOLYGON (((144.67238 -37.56779, 144.67127... |
| 3 | Hobsons Bay | None | MULTIPOLYGON (((144.90275 -37.84707, 144.90339... |
| 4 | Kingston | None | MULTIPOLYGON (((145.14170 -37.93139, 145.14190... |
| 5 | Maribyrnong | None | MULTIPOLYGON (((144.90263 -37.78966, 144.90261... |
| 6 | Monash | None | MULTIPOLYGON (((145.14170 -37.93139, 145.13654... |
| 7 | Nillumbik | None | MULTIPOLYGON (((145.06784 -37.68664, 145.06824... |
| 8 | Whittlesea | None | MULTIPOLYGON (((145.06784 -37.68664, 145.06767... |
| 9 | Yarra | None | MULTIPOLYGON (((145.04020 -37.78422, 145.04014... |
| 10 | Bayside | None | MULTIPOLYGON (((144.99705 -37.88387, 145.00104... |
| 11 | Moonee Valley | None | MULTIPOLYGON (((144.90263 -37.78966, 144.90267... |
| 12 | Casey | None | MULTIPOLYGON (((145.22919 -37.95208, 145.23002... |
| 13 | Knox | None | MULTIPOLYGON (((145.22919 -37.95208, 145.22908... |
| 14 | Glen Eira | None | MULTIPOLYGON (((144.99705 -37.88387, 144.99675... |
| 15 | Moreland | None | MULTIPOLYGON (((144.93708 -37.77779, 144.93692... |
| 16 | Whitehorse | None | MULTIPOLYGON (((145.19571 -37.86517, 145.19532... |
| 17 | Maroondah | None | MULTIPOLYGON (((145.21353 -37.81177, 145.21369... |
| 18 | Manningham | None | MULTIPOLYGON (((145.28957 -37.76282, 145.28924... |
| 19 | Melton | None | MULTIPOLYGON (((144.75088 -37.81042, 144.75078... |
| 20 | Yarra Ranges | None | MULTIPOLYGON (((145.76502 -37.89858, 145.76055... |
| 21 | Boroondara | None | MULTIPOLYGON (((145.00504 -37.80530, 145.00527... |
| 22 | Stonnington | None | MULTIPOLYGON (((145.05624 -37.85293, 145.05626... |
| 23 | Port Phillip | None | MULTIPOLYGON (((144.99705 -37.88387, 144.99226... |
| 24 | Melbourne | None | MULTIPOLYGON (((144.93708 -37.77779, 144.93734... |
| 25 | Cardinia | None | MULTIPOLYGON (((145.60569 -38.33243, 145.59361... |
| 26 | Banyule | None | MULTIPOLYGON (((145.13566 -37.74089, 145.13561... |
| 27 | Darebin | None | MULTIPOLYGON (((144.98852 -37.77641, 144.98852... |
| 28 | Brimbank | None | MULTIPOLYGON (((144.88846 -37.71082, 144.88823... |
| 29 | Frankston | None | MULTIPOLYGON (((145.12278 -38.08510, 145.12312... |
Insights.1
To identify specific patterns in order to decide how to trim the datat set as per my analysis
# To identify unique number of hosts in Melbourne
rawdata['host_id'].nunique()
11866
#Identifying max. and min. listing price, total number of listings and average listing price
rawdata['price'].agg(['min', 'max', 'mean', 'count'])
min 0.000000 max 16267.000000 mean 167.674688 count 18367.000000 Name: price, dtype: float64
rawdata['minimum_nights'].agg(['min', 'max', 'mean', 'count'])
min 1.000000 max 1125.000000 mean 6.741547 count 18367.000000 Name: minimum_nights, dtype: float64
# Identifying total number of districts, number of listings per neighbourhod, average-max-min price of a listings per neighbourhood
rawdata.groupby('neighbourhood')['price'].agg(['min', 'max', 'mean', 'count']).sort_values('count', ascending=False).reset_index()
| neighbourhood | min | max | mean | count | |
|---|---|---|---|---|---|
| 0 | Melbourne | 15 | 13000 | 152.879846 | 5468 |
| 1 | Port Phillip | 15 | 16267 | 177.390583 | 2230 |
| 2 | Yarra | 15 | 5000 | 152.569307 | 1414 |
| 3 | Stonnington | 13 | 8888 | 190.729992 | 1237 |
| 4 | Yarra Ranges | 0 | 4357 | 305.680348 | 804 |
| 5 | Moreland | 18 | 2644 | 125.846253 | 774 |
| 6 | Monash | 16 | 3600 | 140.991379 | 580 |
| 7 | Darebin | 14 | 13379 | 165.333333 | 537 |
| 8 | Boroondara | 13 | 3374 | 162.285714 | 518 |
| 9 | Glen Eira | 19 | 13000 | 161.131373 | 510 |
| 10 | Whitehorse | 19 | 9856 | 153.983471 | 484 |
| 11 | Wyndham | 16 | 522 | 120.745011 | 451 |
| 12 | Maribyrnong | 16 | 7000 | 133.379310 | 406 |
| 13 | Bayside | 25 | 6145 | 307.758514 | 323 |
| 14 | Moonee Valley | 20 | 1990 | 133.152542 | 295 |
| 15 | Kingston | 28 | 1957 | 185.351351 | 259 |
| 16 | Manningham | 16 | 9856 | 177.791837 | 245 |
| 17 | Hobsons Bay | 25 | 1000 | 163.312821 | 195 |
| 18 | Banyule | 14 | 800 | 108.357513 | 193 |
| 19 | Frankston | 27 | 1600 | 207.574850 | 167 |
| 20 | Casey | 25 | 1000 | 115.736196 | 163 |
| 21 | Knox | 25 | 9999 | 262.077419 | 155 |
| 22 | Hume | 20 | 590 | 119.589041 | 146 |
| 23 | Whittlesea | 14 | 15000 | 192.624113 | 141 |
| 24 | Cardinia | 32 | 924 | 180.657143 | 140 |
| 25 | Greater Dandenong | 14 | 5000 | 144.518797 | 133 |
| 26 | Brimbank | 25 | 750 | 99.366412 | 131 |
| 27 | Melton | 18 | 900 | 142.752577 | 97 |
| 28 | Maroondah | 25 | 9999 | 258.112360 | 89 |
| 29 | Nillumbik | 20 | 1500 | 210.341463 | 82 |
# Checking the distribution of pirces, it seems to be very concentrated less than 600/700 dollars
rawdata.hist(column = 'price', bins = 50)
array([[<AxesSubplot:title={'center':'price'}>]], dtype=object)
# Checking the standard deviation (SD) of prices in the whole data and every neighbourhoods to understand the price fluctuation a bit better
rawdata.std()['price']
393.6950367331368
rawdata.groupby('neighbourhood').std()['price'].sort_values(ascending = False).reset_index()
| neighbourhood | price | |
|---|---|---|
| 0 | Whittlesea | 1259.922773 |
| 1 | Knox | 1129.133132 |
| 2 | Maroondah | 1053.229929 |
| 3 | Darebin | 802.592620 |
| 4 | Manningham | 635.127877 |
| 5 | Glen Eira | 588.234901 |
| 6 | Whitehorse | 585.543512 |
| 7 | Bayside | 510.302581 |
| 8 | Stonnington | 476.883184 |
| 9 | Greater Dandenong | 431.526582 |
| 10 | Port Phillip | 406.160226 |
| 11 | Maribyrnong | 361.999623 |
| 12 | Yarra Ranges | 295.654979 |
| 13 | Melbourne | 278.936028 |
| 14 | Monash | 265.949003 |
| 15 | Frankston | 246.733342 |
| 16 | Boroondara | 227.623887 |
| 17 | Nillumbik | 224.601992 |
| 18 | Kingston | 210.838388 |
| 19 | Moreland | 200.095529 |
| 20 | Moonee Valley | 193.072491 |
| 21 | Yarra | 190.976614 |
| 22 | Melton | 144.952006 |
| 23 | Hobsons Bay | 129.180803 |
| 24 | Cardinia | 122.732192 |
| 25 | Casey | 116.439770 |
| 26 | Banyule | 110.577281 |
| 27 | Hume | 107.999979 |
| 28 | Wyndham | 103.141798 |
| 29 | Brimbank | 98.586253 |
# Checking the number of listings that are below 560 dollars (which is mean price + SD)
rawdata.query('price < 560')
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9835 | Beautiful Room & House | 33057 | Manju | NaN | Manningham | -37.772470 | 145.092320 | Private room | 60 | 1 | 4 | 2015-09-12 | 0.03 | 1 | 365 | 0 | NaN |
| 1 | 12936 | St Kilda 1BR+BEACHSIDE+BALCONY+WIFI+AC | 50121 | The A2C Team | NaN | Port Phillip | -37.859990 | 144.976620 | Entire home/apt | 95 | 3 | 42 | 2016-01-25 | 0.74 | 10 | 0 | 0 | NaN |
| 2 | 38271 | Melbourne - Old Trafford Apartment | 164193 | Daryl & Dee | NaN | Casey | -38.057250 | 145.339360 | Entire home/apt | 100 | 1 | 162 | 2020-01-09 | 1.23 | 1 | 345 | 13 | NaN |
| 3 | 41836 | CLOSE TO CITY & MELBOURNE AIRPORT | 182833 | Diana | NaN | Darebin | -37.697290 | 145.000820 | Private room | 40 | 7 | 159 | 2017-08-24 | 1.72 | 2 | 0 | 0 | NaN |
| 4 | 43429 | Tranquil Javanese-Style Apartment in Oakleigh ... | 189684 | Allan | NaN | Monash | -37.899600 | 145.114470 | Entire home/apt | 99 | 2 | 214 | 2019-08-09 | 3.41 | 2 | 297 | 4 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18362 | 52136491 | Contemporary Spacious One BDR Apt in Southbank | 161697557 | Eachann | NaN | Melbourne | -37.826921 | 144.960394 | Entire home/apt | 80 | 180 | 0 | NaN | NaN | 64 | 357 | 0 | NaN |
| 18363 | 52136551 | Spacious One BDR Apt w/ Amazing Views | 161697557 | Eachann | NaN | Port Phillip | -37.828707 | 144.958652 | Entire home/apt | 70 | 180 | 0 | NaN | NaN | 64 | 191 | 0 | NaN |
| 18364 | 52137025 | Stylish Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Melbourne | -37.846132 | 144.979593 | Entire home/apt | 90 | 180 | 0 | NaN | NaN | 64 | 223 | 0 | NaN |
| 18365 | 52137124 | Chic Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Port Phillip | -37.847558 | 144.977845 | Entire home/apt | 100 | 180 | 0 | NaN | NaN | 64 | 358 | 0 | NaN |
| 18366 | 52137197 | Cozy Two BDR Apt at St Kilda Rd | 161697557 | Eachann | NaN | Melbourne | -37.846641 | 144.979528 | Entire home/apt | 88 | 180 | 0 | NaN | NaN | 64 | 358 | 0 | NaN |
17808 rows × 18 columns
# Visualising listing price 560 dollars (which is mean price + SD) data to get a better idea
rawdata.query('price < 560').hist(column = 'price', bins = 50)
array([[<AxesSubplot:title={'center':'price'}>]], dtype=object)
# To check if there any listing that has license
rawdata['neighbourhood_group'].nunique()
0
# To check if there any neighbouthood_group mentioned in the listing data
rawdata['license'].nunique()
0
# Identifying the host which has the maximum and minimum number of listings, the range of prices the hosts with highest listings offers
rawdata.groupby('host_id')['price'].agg(['min', 'max', 'mean', 'count']).sort_values('count', ascending = False).reset_index()
| host_id | min | max | mean | count | |
|---|---|---|---|---|---|
| 0 | 90729398 | 30 | 107 | 41.208333 | 144 |
| 1 | 279001183 | 73 | 485 | 134.916667 | 120 |
| 2 | 343442154 | 56 | 114 | 72.688312 | 77 |
| 3 | 284364084 | 75 | 220 | 102.528571 | 70 |
| 4 | 1739996 | 86 | 130 | 108.388060 | 67 |
| ... | ... | ... | ... | ... | ... |
| 11861 | 35748115 | 250 | 250 | 250.000000 | 1 |
| 11862 | 35752107 | 85 | 85 | 85.000000 | 1 |
| 11863 | 35811993 | 27 | 27 | 27.000000 | 1 |
| 11864 | 35826673 | 75 | 75 | 75.000000 | 1 |
| 11865 | 421766875 | 134 | 134 | 134.000000 | 1 |
11866 rows × 5 columns
# checking if hosts that have high listing price, also have other listings.
rawdata.groupby('host_id')['price'].agg(['min', 'max', 'mean', 'count']).sort_values('max', ascending = False).reset_index()
| host_id | min | max | mean | count | |
|---|---|---|---|---|---|
| 0 | 11914644 | 202 | 16267 | 1842.470588 | 51 |
| 1 | 63405705 | 15000 | 15000 | 15000.000000 | 1 |
| 2 | 96127123 | 13379 | 13379 | 13379.000000 | 1 |
| 3 | 172589130 | 142 | 13000 | 6571.000000 | 2 |
| 4 | 158245484 | 13000 | 13000 | 13000.000000 | 1 |
| ... | ... | ... | ... | ... | ... |
| 11861 | 48462471 | 14 | 14 | 14.000000 | 1 |
| 11862 | 210338742 | 14 | 14 | 14.000000 | 1 |
| 11863 | 46769195 | 13 | 13 | 13.000000 | 1 |
| 11864 | 35925193 | 13 | 13 | 13.000000 | 1 |
| 11865 | 175946909 | 0 | 0 | 0.000000 | 1 |
11866 rows × 5 columns
# Identifying hosts that have more than 1 listing, to check the number of serious/professional hosts
rawdata.groupby('host_id')['price'].agg(['min', 'max', 'mean', 'count']).sort_values('count', ascending=False).query('count > 1').reset_index()
| host_id | min | max | mean | count | |
|---|---|---|---|---|---|
| 0 | 90729398 | 30 | 107 | 41.208333 | 144 |
| 1 | 279001183 | 73 | 485 | 134.916667 | 120 |
| 2 | 343442154 | 56 | 114 | 72.688312 | 77 |
| 3 | 284364084 | 75 | 220 | 102.528571 | 70 |
| 4 | 1739996 | 86 | 130 | 108.388060 | 67 |
| ... | ... | ... | ... | ... | ... |
| 2065 | 7946139 | 270 | 270 | 270.000000 | 2 |
| 2066 | 293664322 | 31 | 39 | 35.000000 | 2 |
| 2067 | 76712367 | 106 | 151 | 128.500000 | 2 |
| 2068 | 256356480 | 82 | 83 | 82.500000 | 2 |
| 2069 | 51607871 | 75 | 193 | 134.000000 | 2 |
2070 rows × 5 columns
rawdata.groupby('neighbourhood')['price'].agg(['min', 'max', 'mean', 'count']).sort_values('count', ascending=False).query('count > 1').reset_index()
| neighbourhood | min | max | mean | count | |
|---|---|---|---|---|---|
| 0 | Melbourne | 15 | 13000 | 152.879846 | 5468 |
| 1 | Port Phillip | 15 | 16267 | 177.390583 | 2230 |
| 2 | Yarra | 15 | 5000 | 152.569307 | 1414 |
| 3 | Stonnington | 13 | 8888 | 190.729992 | 1237 |
| 4 | Yarra Ranges | 0 | 4357 | 305.680348 | 804 |
| 5 | Moreland | 18 | 2644 | 125.846253 | 774 |
| 6 | Monash | 16 | 3600 | 140.991379 | 580 |
| 7 | Darebin | 14 | 13379 | 165.333333 | 537 |
| 8 | Boroondara | 13 | 3374 | 162.285714 | 518 |
| 9 | Glen Eira | 19 | 13000 | 161.131373 | 510 |
| 10 | Whitehorse | 19 | 9856 | 153.983471 | 484 |
| 11 | Wyndham | 16 | 522 | 120.745011 | 451 |
| 12 | Maribyrnong | 16 | 7000 | 133.379310 | 406 |
| 13 | Bayside | 25 | 6145 | 307.758514 | 323 |
| 14 | Moonee Valley | 20 | 1990 | 133.152542 | 295 |
| 15 | Kingston | 28 | 1957 | 185.351351 | 259 |
| 16 | Manningham | 16 | 9856 | 177.791837 | 245 |
| 17 | Hobsons Bay | 25 | 1000 | 163.312821 | 195 |
| 18 | Banyule | 14 | 800 | 108.357513 | 193 |
| 19 | Frankston | 27 | 1600 | 207.574850 | 167 |
| 20 | Casey | 25 | 1000 | 115.736196 | 163 |
| 21 | Knox | 25 | 9999 | 262.077419 | 155 |
| 22 | Hume | 20 | 590 | 119.589041 | 146 |
| 23 | Whittlesea | 14 | 15000 | 192.624113 | 141 |
| 24 | Cardinia | 32 | 924 | 180.657143 | 140 |
| 25 | Greater Dandenong | 14 | 5000 | 144.518797 | 133 |
| 26 | Brimbank | 25 | 750 | 99.366412 | 131 |
| 27 | Melton | 18 | 900 | 142.752577 | 97 |
| 28 | Maroondah | 25 | 9999 | 258.112360 | 89 |
| 29 | Nillumbik | 20 | 1500 | 210.341463 | 82 |
Insights.2
Based on the insights of section 2, the raw data will be cleaned for further analysis
del rawdata["license"]
del rawdata["neighbourhood_group"]
data = rawdata.query('price > 0 & price < 560 & last_review > "2017-12-01" & last_review < "2019-12-31"').copy()
data
| id | name | host_id | host_name | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | 43429 | Tranquil Javanese-Style Apartment in Oakleigh ... | 189684 | Allan | Monash | -37.89960 | 145.11447 | Entire home/apt | 99 | 2 | 214 | 2019-08-09 | 3.41 | 2 | 297 | 4 |
| 7 | 66754 | Richmond CITY EDGE 60s COOL 1BR+WIFI+AC | 50121 | The A2C Team | Yarra | -37.82127 | 144.99408 | Entire home/apt | 94 | 3 | 70 | 2019-10-05 | 1.68 | 10 | 0 | 0 |
| 8 | 67211 | Kew Tranquility, Melbourne | 326880 | Kate | Boroondara | -37.80347 | 145.03583 | Private room | 45 | 2 | 176 | 2018-10-28 | 1.62 | 1 | 365 | 4 |
| 14 | 74959 | Blissful Beachside Port Melbourne Warehouse | 395864 | Linda | Port Phillip | -37.84098 | 144.93640 | Private room | 90 | 2 | 108 | 2018-01-24 | 0.86 | 1 | 218 | 0 |
| 15 | 78143 | 2 bedrooms-ideal for friends/family in quiet area | 419767 | Julie | Stonnington | -37.85162 | 144.98943 | Private room | 80 | 2 | 149 | 2019-03-18 | 1.72 | 1 | 365 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 13911 | 40973851 | Delightful Studio Apartment in Brunswick | 127111239 | Ian | Moreland | -37.76897 | 144.95135 | Entire home/apt | 80 | 14 | 1 | 2019-12-29 | 0.05 | 1 | 0 | 0 |
| 13914 | 40976256 | Cozy luxury +friendly multinational space in CBD | 319506475 | Javad | Melbourne | -37.80724 | 144.97191 | Shared room | 27 | 2 | 2 | 2019-12-29 | 0.10 | 3 | 0 | 0 |
| 13923 | 40989333 | Cozy luxury + friendly multinational space in CBD | 319506475 | Javad | Melbourne | -37.80850 | 144.97028 | Shared room | 21 | 3 | 1 | 2019-12-28 | 0.05 | 3 | 0 | 0 |
| 13936 | 41009326 | Private Double Room with En-suite, Great Location | 32877188 | Adam | Port Phillip | -37.87402 | 144.99772 | Private room | 75 | 1 | 1 | 2019-12-29 | 0.05 | 1 | 89 | 0 |
| 14005 | 41093261 | LUGGAGE STORAGES Near SKYBUS Southern Cross STN. | 302684901 | Rosie | Melbourne | -37.81614 | 144.95390 | Shared room | 15 | 1 | 1 | 2019-12-28 | 0.05 | 3 | 0 | 0 |
4852 rows × 16 columns
Insights.3
Further cleaning and extracting more information from the existing dataset and store in a new columns
# trying to get the data for average stay length. However the data seems to be skewed due to a few outlyers
data['minimum_nights'].agg(['min', 'max', 'mean'])
min 1.000000 max 1125.000000 mean 6.558533 Name: minimum_nights, dtype: float64
# Seems like only Melbourne has a listing that requires 1125 minimum nights which seems to be typo
data.groupby('neighbourhood')['minimum_nights'].agg(['min', 'max', 'mean']).sort_values('max', ascending = False).head(5).reset_index()
| neighbourhood | min | max | mean | |
|---|---|---|---|---|
| 0 | Melbourne | 1 | 1125 | 10.176056 |
| 1 | Boroondara | 1 | 365 | 6.285714 |
| 2 | Yarra | 1 | 365 | 5.186480 |
| 3 | Bayside | 1 | 365 | 7.779221 |
| 4 | Port Phillip | 1 | 365 | 7.684295 |
# Removing the outlyers in minimum_nights data helped
data.query('minimum_nights < 366').groupby('neighbourhood')['minimum_nights'].agg(['mean']).sort_values('mean', ascending=False).head(5).reset_index()
| neighbourhood | mean | |
|---|---|---|
| 0 | Bayside | 7.779221 |
| 1 | Port Phillip | 7.684295 |
| 2 | Moreland | 6.578261 |
| 3 | Melbourne | 6.299363 |
| 4 | Boroondara | 6.285714 |
def calculate_occupancy_rate(number_of_reviews, minimum_nights):
# review rate of 50% is assumed and average stay length is assumed to be 3 as mode of all district wise minimum nights mean is 3.
review_rate = 0.5
days_in_month = 31
average_stay_length_assumption = 3
max_occupancy = 0.7
# calculate occupancy
average_stay_length = minimum_nights.clip(lower = average_stay_length_assumption)
occupancy_per_month = number_of_reviews / review_rate * average_stay_length / days_in_month
# cap occupancy at max_occupancy
exceed_max_occupancy = occupancy_per_month > max_occupancy
occupancy_per_month.loc[exceed_max_occupancy] = 0.7
# fill missing values with a zero (0)
occupancy_per_month_fillna = occupancy_per_month.fillna(0)
return occupancy_per_month_fillna
# calculating occupancy rate and monthly rental income
data['occupancy_rate'] = calculate_occupancy_rate(data['reviews_per_month'], data['minimum_nights'])
data['monthly_rental_income'] = data['price'] * data['occupancy_rate'] * 31
# plotting graph to check the distribution of data
data.hist(column = 'occupancy_rate', bins = 50)
array([[<AxesSubplot:title={'center':'occupancy_rate'}>]], dtype=object)
# Average monthly rental income for Melbourne 2 year before the pandemic was 739 dollars
data['monthly_rental_income'].agg(['mean','max','min'])
mean 739.372943 max 10850.000000 min 1.920000 Name: monthly_rental_income, dtype: float64
# The total monthly rental income of all the hosts in Melbounse 2 years before the pandemic was 3,587,437 dollars
data['monthly_rental_income'].agg(['sum'])
sum 3587437.52 Name: monthly_rental_income, dtype: float64
# identifying the hosts who has the highest monthly rental income.
data.groupby('host_id')['monthly_rental_income'].agg('sum').sort_values(ascending = False)
host_id
9028154 27679.14
4170460 26672.70
1739996 26226.98
30900122 17610.44
93930367 17106.42
...
160057309 3.06
170503763 3.00
43953621 3.00
120650230 3.00
50094176 1.92
Name: monthly_rental_income, Length: 3938, dtype: float64
# Perhaps because Joe has 13 listings where guests have stayed in and multiple listings that have highest occupancy rate
data.query('host_id == "9028154"')
| id | name | host_id | host_name | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | occupancy_rate | monthly_rental_income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 318 | 1742705 | "Ten" - Pearl Apartments | 9028154 | Joe | Stonnington | -37.83888 | 144.99477 | Entire home/apt | 140 | 2 | 63 | 2019-12-06 | 1.09 | 29 | 351 | 1 | 0.210968 | 915.60 |
| 476 | 2426659 | "Beach House" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86327 | 144.97430 | Entire home/apt | 80 | 14 | 78 | 2018-03-28 | 1.11 | 29 | 351 | 0 | 0.700000 | 1736.00 |
| 712 | 3878476 | "Alfred Square" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86327 | 144.97430 | Entire home/apt | 160 | 365 | 77 | 2019-12-13 | 0.91 | 29 | 351 | 0 | 0.700000 | 3472.00 |
| 918 | 4755721 | "Palais" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86686 | 144.97842 | Entire home/apt | 133 | 2 | 73 | 2019-10-08 | 0.92 | 29 | 351 | 0 | 0.178065 | 734.16 |
| 2384 | 10028048 | "Carlisle" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86686 | 144.97842 | Entire home/apt | 200 | 299 | 34 | 2019-10-06 | 0.49 | 29 | 351 | 0 | 0.700000 | 4340.00 |
| 3668 | 14112295 | "Sapphire 2 bedroom" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86676 | 144.97858 | Entire home/apt | 150 | 2 | 6 | 2019-02-21 | 0.11 | 29 | 351 | 0 | 0.021290 | 99.00 |
| 3675 | 14143944 | "Alexandra" - Pearl Apartments | 9028154 | Joe | Stonnington | -37.83646 | 144.99426 | Entire home/apt | 133 | 2 | 49 | 2019-03-21 | 0.81 | 29 | 351 | 1 | 0.156774 | 646.38 |
| 4622 | 17275221 | "Acland St Courtyard" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86321 | 144.97446 | Entire home/apt | 151 | 300 | 17 | 2018-12-01 | 0.31 | 29 | 351 | 0 | 0.700000 | 3276.70 |
| 4869 | 18060185 | "Studio 206" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86679 | 144.97832 | Entire home/apt | 250 | 360 | 13 | 2019-11-06 | 0.26 | 29 | 351 | 0 | 0.700000 | 5425.00 |
| 5967 | 20990318 | "Sapphire 1 bedroom" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86679 | 144.97832 | Entire home/apt | 400 | 14 | 4 | 2019-01-30 | 0.13 | 29 | 351 | 0 | 0.117419 | 1456.00 |
| 8727 | 27437899 | "Shakespeare Grove" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86376 | 144.97562 | Entire home/apt | 151 | 5 | 1 | 2019-03-04 | 0.03 | 29 | 351 | 0 | 0.009677 | 45.30 |
| 9206 | 29108049 | "Rhapsody" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.84791 | 144.97783 | Entire home/apt | 150 | 2 | 3 | 2019-10-13 | 0.12 | 29 | 166 | 0 | 0.023226 | 108.00 |
| 10217 | 31549462 | "Studio 306" - Pearl Apartments | 9028154 | Joe | Port Phillip | -37.86601 | 144.97768 | Entire home/apt | 250 | 356 | 4 | 2019-09-23 | 0.14 | 29 | 351 | 0 | 0.700000 | 5425.00 |
# Understanding monthly rental income per neighbourhood
data.groupby('neighbourhood')['monthly_rental_income'].agg(['min', 'max', 'mean']).sort_values('mean', ascending=False).reset_index()
| neighbourhood | min | max | mean | |
|---|---|---|---|---|
| 0 | Yarra Ranges | 19.80 | 8528.10 | 2252.517671 |
| 1 | Cardinia | 16.80 | 6401.50 | 1160.553636 |
| 2 | Nillumbik | 36.00 | 3949.40 | 970.833000 |
| 3 | Manningham | 1.92 | 6923.70 | 923.416901 |
| 4 | Melbourne | 3.00 | 10850.00 | 884.883042 |
| 5 | Melton | 16.20 | 3780.00 | 788.017000 |
| 6 | Maroondah | 16.80 | 3146.50 | 763.046364 |
| 7 | Yarra | 5.40 | 6510.00 | 743.127879 |
| 8 | Stonnington | 4.20 | 6878.90 | 739.052348 |
| 9 | Port Phillip | 6.00 | 8680.00 | 723.264038 |
| 10 | Kingston | 7.20 | 3580.50 | 679.469091 |
| 11 | Hobsons Bay | 5.88 | 6804.00 | 668.250175 |
| 12 | Bayside | 14.40 | 6608.00 | 663.538182 |
| 13 | Frankston | 4.08 | 3580.50 | 649.738710 |
| 14 | Maribyrnong | 8.16 | 7539.48 | 628.014797 |
| 15 | Moreland | 5.04 | 8094.10 | 572.373043 |
| 16 | Wyndham | 5.40 | 5830.50 | 544.421702 |
| 17 | Moonee Valley | 5.76 | 7595.00 | 516.576712 |
| 18 | Hume | 3.00 | 3211.60 | 511.579048 |
| 19 | Boroondara | 3.00 | 3969.00 | 499.387532 |
| 20 | Knox | 3.96 | 5054.40 | 477.922800 |
| 21 | Darebin | 7.20 | 4664.00 | 447.272442 |
| 22 | Brimbank | 5.16 | 3540.60 | 435.386429 |
| 23 | Banyule | 7.00 | 2213.40 | 411.837308 |
| 24 | Glen Eira | 3.24 | 3861.00 | 400.226111 |
| 25 | Monash | 3.36 | 2777.60 | 320.726667 |
| 26 | Whitehorse | 5.16 | 3859.20 | 311.919469 |
| 27 | Whittlesea | 4.20 | 1524.78 | 254.828000 |
| 28 | Casey | 9.60 | 1318.20 | 201.488889 |
| 29 | Greater Dandenong | 4.80 | 835.20 | 136.010000 |
# Although Yarra ranges has the highest monthly rental income but since Melbourne has so many ratings it has even higher sum of all monthly rental income
data.groupby('neighbourhood')['monthly_rental_income'].agg(['sum']).sort_values('sum', ascending=False).reset_index()
| neighbourhood | sum | |
|---|---|---|
| 0 | Melbourne | 1256533.92 |
| 1 | Port Phillip | 451316.76 |
| 2 | Yarra Ranges | 328867.58 |
| 3 | Yarra | 318801.86 |
| 4 | Stonnington | 254973.06 |
| 5 | Moreland | 131645.80 |
| 6 | Maribyrnong | 77245.82 |
| 7 | Darebin | 76930.86 |
| 8 | Boroondara | 76905.68 |
| 9 | Manningham | 65562.60 |
| 10 | Glen Eira | 57632.56 |
| 11 | Wyndham | 51175.64 |
| 12 | Bayside | 51092.44 |
| 13 | Monash | 46184.64 |
| 14 | Hobsons Bay | 38090.26 |
| 15 | Moonee Valley | 37710.10 |
| 16 | Kingston | 37370.80 |
| 17 | Whitehorse | 35246.90 |
| 18 | Cardinia | 25532.18 |
| 19 | Knox | 23896.14 |
| 20 | Hume | 21486.32 |
| 21 | Banyule | 21415.54 |
| 22 | Frankston | 20141.90 |
| 23 | Nillumbik | 19416.66 |
| 24 | Maroondah | 16787.02 |
| 25 | Melton | 15760.34 |
| 26 | Brimbank | 12190.82 |
| 27 | Whittlesea | 6370.70 |
| 28 | Greater Dandenong | 5712.42 |
| 29 | Casey | 5440.20 |
# Creating a variable listing top-mid-lowest 2 earning neighbourhoods to create graphical visualization
Top_Mid_bottom_neighbourhoods = ['Yarra Ranges', 'Cardinia', 'Moreland', 'Wyndham', 'Casey', 'Greater Dandenong']
for value in Top_Mid_bottom_neighbourhoods:
print(value)
Yarra Ranges Cardinia Moreland Wyndham Casey Greater Dandenong
# Using for loop to understand the pattern in the neighboughoods with the highest, medium and lowest monthly income (top2, mid 2 and lower 2)
Top_Mid_bottom_neighbourhoods = ['Yarra Ranges', 'Cardinia', 'Moreland', 'Wyndham', 'Casey', 'Greater Dandenong']
for value in Top_Mid_bottom_neighbourhoods:
data.query('neighbourhood == @value').hist(column ='monthly_rental_income', bins =50)
Insights.4
Gaining deeper insights into our cleaned Melbourne data through visual plotting
# reviewing the name of each column heading
data.columns
Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood', 'latitude',
'longitude', 'room_type', 'price', 'minimum_nights',
'number_of_reviews', 'last_review', 'reviews_per_month',
'calculated_host_listings_count', 'availability_365',
'number_of_reviews_ltm', 'occupancy_rate', 'monthly_rental_income'],
dtype='object')
chart = alt.Chart(data)
# Checking if the room type influences the monthly rental income depending on occupancy rate
chart.mark_point().encode(
x = 'occupancy_rate',
y = 'monthly_rental_income', color = "room_type"
)
# Visualizing how room types influences monthly rental income in each neighbourhood
chart.mark_point().encode(
x = 'neighbourhood',
y = 'monthly_rental_income', color = "room_type"
)
# neighbourhoods with high occupancy rates sorted from highest occupancy rate (left to right)
chart.mark_bar().encode(x = alt.X('neighbourhood', sort = '-y'), y = 'mean(occupancy_rate)')
# Number of listings per neighboughood
chart.mark_bar().encode(x = alt.X('neighbourhood', sort = 'y'), y = 'count(calculated_host_listings_count)')
# Average monthly rental income per neighbourhood
chart.mark_bar().encode(x = alt.X('neighbourhood', sort = '-y'), y = 'mean(monthly_rental_income)', tooltip=['neighbourhood', 'mean(monthly_rental_income)', 'count()'])
Insights.5
To understand the geographic data, make changes and add columns for further analysis
# similar to the Melbourne's listings data set perhaps in geojson data set there is no neighbourhood_group data
rawneighbourhoods['neighbourhood_group'].nunique()
0
# deleting neighbourhood_groups as there is no data in it
del rawneighbourhoods["neighbourhood_group"]
# creating a clean version of neighbourhoods data
neighbourhoods = rawneighbourhoods.copy()
neighbourhoods
| neighbourhood | geometry | |
|---|---|---|
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... |
| 1 | Wyndham | MULTIPOLYGON (((144.82749 -37.82280, 144.82654... |
| 2 | Hume | MULTIPOLYGON (((144.67238 -37.56779, 144.67127... |
| 3 | Hobsons Bay | MULTIPOLYGON (((144.90275 -37.84707, 144.90339... |
| 4 | Kingston | MULTIPOLYGON (((145.14170 -37.93139, 145.14190... |
| 5 | Maribyrnong | MULTIPOLYGON (((144.90263 -37.78966, 144.90261... |
| 6 | Monash | MULTIPOLYGON (((145.14170 -37.93139, 145.13654... |
| 7 | Nillumbik | MULTIPOLYGON (((145.06784 -37.68664, 145.06824... |
| 8 | Whittlesea | MULTIPOLYGON (((145.06784 -37.68664, 145.06767... |
| 9 | Yarra | MULTIPOLYGON (((145.04020 -37.78422, 145.04014... |
| 10 | Bayside | MULTIPOLYGON (((144.99705 -37.88387, 145.00104... |
| 11 | Moonee Valley | MULTIPOLYGON (((144.90263 -37.78966, 144.90267... |
| 12 | Casey | MULTIPOLYGON (((145.22919 -37.95208, 145.23002... |
| 13 | Knox | MULTIPOLYGON (((145.22919 -37.95208, 145.22908... |
| 14 | Glen Eira | MULTIPOLYGON (((144.99705 -37.88387, 144.99675... |
| 15 | Moreland | MULTIPOLYGON (((144.93708 -37.77779, 144.93692... |
| 16 | Whitehorse | MULTIPOLYGON (((145.19571 -37.86517, 145.19532... |
| 17 | Maroondah | MULTIPOLYGON (((145.21353 -37.81177, 145.21369... |
| 18 | Manningham | MULTIPOLYGON (((145.28957 -37.76282, 145.28924... |
| 19 | Melton | MULTIPOLYGON (((144.75088 -37.81042, 144.75078... |
| 20 | Yarra Ranges | MULTIPOLYGON (((145.76502 -37.89858, 145.76055... |
| 21 | Boroondara | MULTIPOLYGON (((145.00504 -37.80530, 145.00527... |
| 22 | Stonnington | MULTIPOLYGON (((145.05624 -37.85293, 145.05626... |
| 23 | Port Phillip | MULTIPOLYGON (((144.99705 -37.88387, 144.99226... |
| 24 | Melbourne | MULTIPOLYGON (((144.93708 -37.77779, 144.93734... |
| 25 | Cardinia | MULTIPOLYGON (((145.60569 -38.33243, 145.59361... |
| 26 | Banyule | MULTIPOLYGON (((145.13566 -37.74089, 145.13561... |
| 27 | Darebin | MULTIPOLYGON (((144.98852 -37.77641, 144.98852... |
| 28 | Brimbank | MULTIPOLYGON (((144.88846 -37.71082, 144.88823... |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08510, 145.12312... |
# checking the coordinate reference system (crs) of neighbourhoods data
neighbourhoods.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# changing the crs of neighbourhoods from geographic to projected to calculate the area of each neighbourhood
mel_area = neighbourhoods.to_crs(epsg=28355)
mel_area.crs
<Projected CRS: EPSG:28355> Name: GDA94 / MGA zone 55 Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Australia - 144°E to 150°E - bounds: (144.0, -50.89, 150.01, -9.23) Coordinate Operation: - name: Map Grid of Australia zone 55 - method: Transverse Mercator Datum: Geocentric Datum of Australia 1994 - Ellipsoid: GRS 1980 - Prime Meridian: Greenwich
# the area will be in meter sq as mentioned in the axis info
mel_area['area'] = mel_area.area
# adding a column converting the area from meter sq. to Km. sq.
mel_area ['square_km'] = mel_area ['area']*0.000001
mel_area
| neighbourhood | geometry | area | square_km | |
|---|---|---|---|---|
| 0 | Greater Dandenong | MULTIPOLYGON (((336687.440 5800169.466, 336751... | 1.294741e+08 | 129.474137 |
| 1 | Wyndham | MULTIPOLYGON (((308789.740 5811621.351, 308706... | 5.420908e+08 | 542.090767 |
| 2 | Hume | MULTIPOLYGON (((294433.004 5839590.635, 294332... | 5.033903e+08 | 503.390264 |
| 3 | Hobsons Bay | MULTIPOLYGON (((315475.137 5809079.945, 315533... | 6.454365e+07 | 64.543652 |
| 4 | Kingston | MULTIPOLYGON (((336687.440 5800169.466, 336704... | 9.162645e+07 | 91.626452 |
| 5 | Maribyrnong | MULTIPOLYGON (((315320.997 5815450.216, 315320... | 3.125081e+07 | 31.250806 |
| 6 | Monash | MULTIPOLYGON (((336687.440 5800169.466, 336232... | 8.147465e+07 | 81.474648 |
| 7 | Nillumbik | MULTIPOLYGON (((329633.909 5827195.036, 329667... | 4.321435e+08 | 432.143540 |
| 8 | Whittlesea | MULTIPOLYGON (((329633.909 5827195.036, 329619... | 4.897212e+08 | 489.721152 |
| 9 | Yarra | MULTIPOLYGON (((327423.121 5816316.806, 327418... | 1.955798e+07 | 19.557977 |
| 10 | Bayside | MULTIPOLYGON (((323860.647 5805179.294, 324222... | 3.744765e+07 | 37.447652 |
| 11 | Moonee Valley | MULTIPOLYGON (((315320.997 5815450.216, 315324... | 4.311595e+07 | 43.115949 |
| 12 | Casey | MULTIPOLYGON (((344420.749 5798023.373, 344489... | 3.968988e+08 | 396.898761 |
| 13 | Knox | MULTIPOLYGON (((344420.749 5798023.373, 344410... | 1.138618e+08 | 113.861794 |
| 14 | Glen Eira | MULTIPOLYGON (((323860.647 5805179.294, 323832... | 3.868911e+07 | 38.689109 |
| 15 | Moreland | MULTIPOLYGON (((318326.095 5816835.468, 318310... | 5.103969e+07 | 51.039693 |
| 16 | Whitehorse | MULTIPOLYGON (((341292.044 5807609.895, 341257... | 6.427161e+07 | 64.271608 |
| 17 | Maroondah | MULTIPOLYGON (((342746.255 5813566.341, 342758... | 6.139909e+07 | 61.399092 |
| 18 | Manningham | MULTIPOLYGON (((349341.063 5819122.802, 349311... | 1.133313e+08 | 113.331309 |
| 19 | Melton | MULTIPOLYGON (((302012.906 5812836.066, 302003... | 5.279453e+08 | 527.945346 |
| 20 | Yarra Ranges | MULTIPOLYGON (((391420.551 5804718.773, 391031... | 2.468177e+09 | 2468.176539 |
| 21 | Boroondara | MULTIPOLYGON (((324376.953 5813912.724, 324396... | 6.019108e+07 | 60.191081 |
| 22 | Stonnington | MULTIPOLYGON (((328994.890 5808722.486, 328996... | 2.563373e+07 | 25.633728 |
| 23 | Port Phillip | MULTIPOLYGON (((323860.647 5805179.294, 323438... | 2.107243e+07 | 21.072427 |
| 24 | Melbourne | MULTIPOLYGON (((318326.095 5816835.468, 318348... | 3.766908e+07 | 37.669076 |
| 25 | Cardinia | MULTIPOLYGON (((378135.176 5756380.138, 377075... | 1.281209e+09 | 1281.209135 |
| 26 | Banyule | MULTIPOLYGON (((335734.437 5821296.992, 335730... | 6.263126e+07 | 62.631260 |
| 27 | Darebin | MULTIPOLYGON (((322853.234 5817087.322, 322853... | 5.346941e+07 | 53.469414 |
| 28 | Brimbank | MULTIPOLYGON (((313876.280 5824171.167, 313856... | 1.234358e+08 | 123.435831 |
| 29 | Frankston | MULTIPOLYGON (((335368.616 5783079.537, 335397... | 1.294951e+08 | 129.495111 |
# the neighboughoods vary a lot in thier sizes. Yarra Ranges and Cardinia are the biggest while yarra and port philip are the smallest
mel_area.groupby('neighbourhood')['square_km'].agg(['max']).sort_values('max', ascending = False).reset_index()
| neighbourhood | max | |
|---|---|---|
| 0 | Yarra Ranges | 2468.176539 |
| 1 | Cardinia | 1281.209135 |
| 2 | Wyndham | 542.090767 |
| 3 | Melton | 527.945346 |
| 4 | Hume | 503.390264 |
| 5 | Whittlesea | 489.721152 |
| 6 | Nillumbik | 432.143540 |
| 7 | Casey | 396.898761 |
| 8 | Frankston | 129.495111 |
| 9 | Greater Dandenong | 129.474137 |
| 10 | Brimbank | 123.435831 |
| 11 | Knox | 113.861794 |
| 12 | Manningham | 113.331309 |
| 13 | Kingston | 91.626452 |
| 14 | Monash | 81.474648 |
| 15 | Hobsons Bay | 64.543652 |
| 16 | Whitehorse | 64.271608 |
| 17 | Banyule | 62.631260 |
| 18 | Maroondah | 61.399092 |
| 19 | Boroondara | 60.191081 |
| 20 | Darebin | 53.469414 |
| 21 | Moreland | 51.039693 |
| 22 | Moonee Valley | 43.115949 |
| 23 | Glen Eira | 38.689109 |
| 24 | Melbourne | 37.669076 |
| 25 | Bayside | 37.447652 |
| 26 | Maribyrnong | 31.250806 |
| 27 | Stonnington | 25.633728 |
| 28 | Port Phillip | 21.072427 |
| 29 | Yarra | 19.557977 |
# converting tabular data for mapping
raw_data_geo = geopandas.GeoDataFrame(rawdata, geometry = geopandas.points_from_xy(rawdata['longitude'], rawdata['latitude']), crs = 'EPSG:4326')
# projecting the raw uncleaned listings data to compare it with the cleaned data
raw_data_geo.plot()
<AxesSubplot:>
# converting tabular data into geographic data
data_geo = geopandas.GeoDataFrame(data, geometry = geopandas.points_from_xy(data['longitude'], data['latitude']), crs = 'EPSG:4326')
# There is not much difference to the raw data projection so we can use the cleaned version
data_geo.plot()
<AxesSubplot:>
# Converting the crs of neighboughood to the same crs of data_geo to perform spatial join
mel_area = mel_area.to_crs(data_geo.crs)
mel_area.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
data_geo.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# now that both the data sets have the same crs, we can do the spatian join using .sjoin function
data_geo_neighbourhoods = geopandas.sjoin(mel_area, data_geo)
data_geo_neighbourhoods
| neighbourhood_left | geometry | area | square_km | index_right | id | name | host_id | host_name | neighbourhood_right | ... | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | occupancy_rate | monthly_rental_income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 8281 | 25785222 | Temporary Home Away from Home | 64729270 | Sue | Greater Dandenong | ... | 33 | 1 | 1 | 2018-06-14 | 0.03 | 1 | 0 | 0 | 0.005806 | 5.94 |
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 7324 | 23127829 | Serene room for a single person or a couple. | 164348312 | Tanya | Greater Dandenong | ... | 45 | 2 | 1 | 2018-02-20 | 0.02 | 1 | 0 | 0 | 0.003871 | 5.40 |
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 8349 | 26029763 | Springvale South 1 Bedroom | 195617704 | Jenn | Greater Dandenong | ... | 39 | 2 | 2 | 2018-07-07 | 0.05 | 1 | 0 | 0 | 0.009677 | 11.70 |
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 12842 | 39323363 | Bedroom for 2 | 365556 | Rebecca | Greater Dandenong | ... | 30 | 1 | 4 | 2019-10-21 | 0.17 | 2 | 0 | 0 | 0.032903 | 30.60 |
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 6447 | 21791978 | Private room and bathroom | 149155932 | Diana | Greater Dandenong | ... | 50 | 2 | 7 | 2018-11-03 | 0.16 | 1 | 0 | 0 | 0.030968 | 48.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 6379 | 21684261 | “Deluxe- Seaford Palms “ Opposite beach and s... | 136108595 | Jacqui & Danny | Frankston | ... | 148 | 2 | 47 | 2019-05-07 | 1.10 | 1 | 115 | 17 | 0.212903 | 976.80 |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 7417 | 23344680 | Home away from home | 7235212 | Tracey | Frankston | ... | 55 | 1 | 3 | 2018-11-03 | 0.07 | 1 | 0 | 0 | 0.013548 | 23.10 |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 10350 | 31943671 | Light-footprint Hippy Haven - SINGLE | 7922332 | Zbd | Frankston | ... | 39 | 1 | 36 | 2019-12-02 | 1.61 | 2 | 37 | 10 | 0.311613 | 376.74 |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 7072 | 22604425 | BEACH FRONT TOWNHOUSE | 36205101 | Rosemary | Frankston | ... | 160 | 2 | 18 | 2018-10-27 | 0.42 | 1 | 0 | 0 | 0.081290 | 403.20 |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 5189 | 18916763 | Quietsville, 5 mins walk beach, hotel, shops | 15207756 | Steve | Frankston | ... | 38 | 1 | 147 | 2019-02-04 | 3.14 | 1 | 158 | 11 | 0.607742 | 715.92 |
4852 rows × 23 columns
# checking the name of all columns
data_geo_neighbourhoods.columns
Index(['neighbourhood_left', 'geometry', 'area', 'square_km', 'index_right',
'id', 'name', 'host_id', 'host_name', 'neighbourhood_right', 'latitude',
'longitude', 'room_type', 'price', 'minimum_nights',
'number_of_reviews', 'last_review', 'reviews_per_month',
'calculated_host_listings_count', 'availability_365',
'number_of_reviews_ltm', 'occupancy_rate', 'monthly_rental_income'],
dtype='object')
# checking if the neighbourhood_left and neighbourhood_right have the same data or not
data_geo_neighbourhoods['same_names'] = data_geo_neighbourhoods['neighbourhood_left'] == data_geo_neighbourhoods['neighbourhood_right']
# neighbourhood_left and neighbourhood_right have the same data
data_geo_neighbourhoods.groupby('same_names')['id'].agg('count')
same_names True 4852 Name: id, dtype: int64
# deleting the neighbourhood_right column
del data_geo_neighbourhoods['neighbourhood_right']
# renaming the neighbourhood_left column to neighbourhood to ensure that it matches with other datasets
data_geo_neighbourhoods = data_geo_neighbourhoods.rename(columns={"neighbourhood_left": "neighbourhood"}).copy()
# income and listings stats per neighbourhood
stats_per_neighbourhood = data_geo_neighbourhoods.groupby('neighbourhood')['monthly_rental_income'].agg(['count', 'mean', 'sum']).reset_index()
stats_per_neighbourhood.columns
Index(['neighbourhood', 'count', 'mean', 'sum'], dtype='object')
# chaning the names of the columns to make it understandable listings per neighbourhood (listingsPN), Average monthly rental income (AvgMRI) and TotalMRI (Total monthly rental income)
stats_per_neighbourhood = stats_per_neighbourhood.rename(columns={"count": "listingsPN"}).rename(columns={"mean": "AvgMRI"}).rename(columns={"sum": "TotalMRI"}).copy()
stats_per_neighbourhood.sort_values('listingsPN', ascending = False)
| neighbourhood | listingsPN | AvgMRI | TotalMRI | |
|---|---|---|---|---|
| 17 | Melbourne | 1420 | 884.883042 | 1256533.92 |
| 23 | Port Phillip | 624 | 723.264038 | 451316.76 |
| 28 | Yarra | 429 | 743.127879 | 318801.86 |
| 24 | Stonnington | 345 | 739.052348 | 254973.06 |
| 21 | Moreland | 230 | 572.373043 | 131645.80 |
| 6 | Darebin | 172 | 447.272442 | 76930.86 |
| 2 | Boroondara | 154 | 499.387532 | 76905.68 |
| 29 | Yarra Ranges | 146 | 2252.517671 | 328867.58 |
| 8 | Glen Eira | 144 | 400.226111 | 57632.56 |
| 19 | Monash | 144 | 320.726667 | 46184.64 |
| 15 | Maribyrnong | 123 | 628.014797 | 77245.82 |
| 25 | Whitehorse | 113 | 311.919469 | 35246.90 |
| 27 | Wyndham | 94 | 544.421702 | 51175.64 |
| 1 | Bayside | 77 | 663.538182 | 51092.44 |
| 20 | Moonee Valley | 73 | 516.576712 | 37710.10 |
| 14 | Manningham | 71 | 923.416901 | 65562.60 |
| 10 | Hobsons Bay | 57 | 668.250175 | 38090.26 |
| 12 | Kingston | 55 | 679.469091 | 37370.80 |
| 0 | Banyule | 52 | 411.837308 | 21415.54 |
| 13 | Knox | 50 | 477.922800 | 23896.14 |
| 9 | Greater Dandenong | 42 | 136.010000 | 5712.42 |
| 11 | Hume | 42 | 511.579048 | 21486.32 |
| 7 | Frankston | 31 | 649.738710 | 20141.90 |
| 3 | Brimbank | 28 | 435.386429 | 12190.82 |
| 5 | Casey | 27 | 201.488889 | 5440.20 |
| 26 | Whittlesea | 25 | 254.828000 | 6370.70 |
| 4 | Cardinia | 22 | 1160.553636 | 25532.18 |
| 16 | Maroondah | 22 | 763.046364 | 16787.02 |
| 18 | Melton | 20 | 788.017000 | 15760.34 |
| 22 | Nillumbik | 20 | 970.833000 | 19416.66 |
# stats for minimum listing price and average listing price per neighbourhood
pricestats_per_neighboughood = data_geo_neighbourhoods.groupby('neighbourhood')['price'].agg(['min','mean'])
pricestats_per_neighboughood = pricestats_per_neighboughood.rename(columns={"min": "minPrice"}).rename(columns={"mean": "AvgPrice"}).copy()
# Calculating average occupancy rates
occupancystats_per_neighbourhood = data_geo_neighbourhoods.groupby('neighbourhood')['occupancy_rate'].agg(['mean'])
occupancystats_per_neighbourhood = occupancystats_per_neighbourhood.rename(columns={"mean": "AvgOccupancyRate"})
occupancystats_per_neighbourhood.sort_values('AvgOccupancyRate', ascending = False).reset_index()
| neighbourhood | AvgOccupancyRate | |
|---|---|---|
| 0 | Yarra Ranges | 0.345890 |
| 1 | Melbourne | 0.219366 |
| 2 | Nillumbik | 0.209032 |
| 3 | Cardinia | 0.199560 |
| 4 | Stonnington | 0.189485 |
| 5 | Melton | 0.188226 |
| 6 | Hume | 0.186682 |
| 7 | Maribyrnong | 0.185492 |
| 8 | Moreland | 0.185377 |
| 9 | Maroondah | 0.185337 |
| 10 | Brimbank | 0.183733 |
| 11 | Frankston | 0.183080 |
| 12 | Yarra | 0.182854 |
| 13 | Kingston | 0.179097 |
| 14 | Port Phillip | 0.175984 |
| 15 | Knox | 0.171716 |
| 16 | Manningham | 0.169968 |
| 17 | Banyule | 0.167953 |
| 18 | Hobsons Bay | 0.158993 |
| 19 | Boroondara | 0.153079 |
| 20 | Moonee Valley | 0.142466 |
| 21 | Wyndham | 0.133734 |
| 22 | Darebin | 0.132982 |
| 23 | Bayside | 0.132576 |
| 24 | Glen Eira | 0.121129 |
| 25 | Monash | 0.106102 |
| 26 | Whitehorse | 0.101405 |
| 27 | Casey | 0.094194 |
| 28 | Whittlesea | 0.086710 |
| 29 | Greater Dandenong | 0.057634 |
data_geo_neighbourhoods.columns
Index(['neighbourhood', 'geometry', 'area', 'square_km', 'index_right', 'id',
'name', 'host_id', 'host_name', 'latitude', 'longitude', 'room_type',
'price', 'minimum_nights', 'number_of_reviews', 'last_review',
'reviews_per_month', 'calculated_host_listings_count',
'availability_365', 'number_of_reviews_ltm', 'occupancy_rate',
'monthly_rental_income', 'same_names'],
dtype='object')
# the most common room type in each neighbourhood is mostly either private room or entire apartment
room_type_mode = data_geo_neighbourhoods.groupby('neighbourhood')['room_type'].agg([pd.Series.mode]).reset_index()
Most_common_room_type = room_type_mode.rename(columns={"mode": "most_common_room_type"}).reset_index()
Most_common_room_type.sort_values(by = 'most_common_room_type', ascending = False)
| index | neighbourhood | most_common_room_type | |
|---|---|---|---|
| 0 | 0 | Banyule | Private room |
| 9 | 9 | Greater Dandenong | Private room |
| 19 | 19 | Monash | Private room |
| 18 | 18 | Melton | Private room |
| 14 | 14 | Manningham | Private room |
| 13 | 13 | Knox | Private room |
| 25 | 25 | Whitehorse | Private room |
| 11 | 11 | Hume | Private room |
| 26 | 26 | Whittlesea | Private room |
| 8 | 8 | Glen Eira | Private room |
| 7 | 7 | Frankston | Private room |
| 27 | 27 | Wyndham | Private room |
| 5 | 5 | Casey | Private room |
| 4 | 4 | Cardinia | Private room |
| 3 | 3 | Brimbank | Private room |
| 20 | 20 | Moonee Valley | Private room |
| 21 | 21 | Moreland | Entire home/apt |
| 24 | 24 | Stonnington | Entire home/apt |
| 23 | 23 | Port Phillip | Entire home/apt |
| 22 | 22 | Nillumbik | Entire home/apt |
| 28 | 28 | Yarra | Entire home/apt |
| 15 | 15 | Maribyrnong | Entire home/apt |
| 17 | 17 | Melbourne | Entire home/apt |
| 16 | 16 | Maroondah | Entire home/apt |
| 1 | 1 | Bayside | Entire home/apt |
| 12 | 12 | Kingston | Entire home/apt |
| 10 | 10 | Hobsons Bay | Entire home/apt |
| 6 | 6 | Darebin | Entire home/apt |
| 2 | 2 | Boroondara | Entire home/apt |
| 29 | 29 | Yarra Ranges | Entire home/apt |
# room_type percentage distribution
room_type_count = data_geo_neighbourhoods.groupby(['neighbourhood','room_type'])['id'].agg(['count'])
total_listings = data_geo_neighbourhoods.groupby('neighbourhood')['id'].agg(['count'])
room_type_percentage_count = room_type_count / total_listings * 100
room_type_percentage = room_type_percentage_count.rename(columns={"count": "%"}).reset_index()
room_type_percentage.query('(neighbourhood == "Melbourne" | neighbourhood == "Yarra" | neighbourhood == "Cardinia")')
| neighbourhood | room_type | % | |
|---|---|---|---|
| 12 | Cardinia | Entire home/apt | 45.454545 |
| 13 | Cardinia | Private room | 54.545455 |
| 47 | Melbourne | Entire home/apt | 66.619718 |
| 48 | Melbourne | Hotel room | 1.619718 |
| 49 | Melbourne | Private room | 29.154930 |
| 50 | Melbourne | Shared room | 2.605634 |
| 81 | Yarra | Entire home/apt | 74.125874 |
| 82 | Yarra | Private room | 25.407925 |
| 83 | Yarra | Shared room | 0.466200 |
neighbourhood_stats = mel_area.merge(stats_per_neighbourhood, on = 'neighbourhood').merge(pricestats_per_neighboughood, on = 'neighbourhood').merge(occupancystats_per_neighbourhood, on = 'neighbourhood').merge(Most_common_room_type, on = 'neighbourhood')
neighbourhood_stats['ListingsDensity'] = neighbourhood_stats['listingsPN']/neighbourhood_stats['square_km']
neighbourhood_stats.copy()
| neighbourhood | geometry | area | square_km | listingsPN | AvgMRI | TotalMRI | minPrice | AvgPrice | AvgOccupancyRate | index | most_common_room_type | ListingsDensity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Greater Dandenong | MULTIPOLYGON (((145.14170 -37.93139, 145.14255... | 1.294741e+08 | 129.474137 | 42 | 136.010000 | 5712.42 | 20 | 104.833333 | 0.057634 | 9 | Private room | 0.324389 |
| 1 | Wyndham | MULTIPOLYGON (((144.82749 -37.82280, 144.82654... | 5.420908e+08 | 542.090767 | 94 | 544.421702 | 51175.64 | 20 | 108.500000 | 0.133734 | 27 | Private room | 0.173403 |
| 2 | Hume | MULTIPOLYGON (((144.67238 -37.56779, 144.67127... | 5.033903e+08 | 503.390264 | 42 | 511.579048 | 21486.32 | 25 | 102.904762 | 0.186682 | 11 | Private room | 0.083434 |
| 3 | Hobsons Bay | MULTIPOLYGON (((144.90275 -37.84707, 144.90339... | 6.454365e+07 | 64.543652 | 57 | 668.250175 | 38090.26 | 25 | 145.578947 | 0.158993 | 10 | Entire home/apt | 0.883123 |
| 4 | Kingston | MULTIPOLYGON (((145.14170 -37.93139, 145.14190... | 9.162645e+07 | 91.626452 | 55 | 679.469091 | 37370.80 | 28 | 132.345455 | 0.179097 | 12 | Entire home/apt | 0.600263 |
| 5 | Maribyrnong | MULTIPOLYGON (((144.90263 -37.78966, 144.90261... | 3.125081e+07 | 31.250806 | 123 | 628.014797 | 77245.82 | 27 | 100.186992 | 0.185492 | 15 | Entire home/apt | 3.935898 |
| 6 | Monash | MULTIPOLYGON (((145.14170 -37.93139, 145.13654... | 8.147465e+07 | 81.474648 | 144 | 320.726667 | 46184.64 | 25 | 90.270833 | 0.106102 | 19 | Private room | 1.767421 |
| 7 | Nillumbik | MULTIPOLYGON (((145.06784 -37.68664, 145.06824... | 4.321435e+08 | 432.143540 | 20 | 970.833000 | 19416.66 | 50 | 142.250000 | 0.209032 | 22 | Entire home/apt | 0.046281 |
| 8 | Whittlesea | MULTIPOLYGON (((145.06784 -37.68664, 145.06767... | 4.897212e+08 | 489.721152 | 25 | 254.828000 | 6370.70 | 30 | 85.880000 | 0.086710 | 26 | Private room | 0.051049 |
| 9 | Yarra | MULTIPOLYGON (((145.04020 -37.78422, 145.04014... | 1.955798e+07 | 19.557977 | 429 | 743.127879 | 318801.86 | 20 | 133.911422 | 0.182854 | 28 | Entire home/apt | 21.934784 |
| 10 | Bayside | MULTIPOLYGON (((144.99705 -37.88387, 145.00104... | 3.744765e+07 | 37.447652 | 77 | 663.538182 | 51092.44 | 35 | 185.181818 | 0.132576 | 1 | Entire home/apt | 2.056204 |
| 11 | Moonee Valley | MULTIPOLYGON (((144.90263 -37.78966, 144.90267... | 4.311595e+07 | 43.115949 | 73 | 516.576712 | 37710.10 | 27 | 106.479452 | 0.142466 | 20 | Private room | 1.693109 |
| 12 | Casey | MULTIPOLYGON (((145.22919 -37.95208, 145.23002... | 3.968988e+08 | 396.898761 | 27 | 201.488889 | 5440.20 | 29 | 85.888889 | 0.094194 | 5 | Private room | 0.068027 |
| 13 | Knox | MULTIPOLYGON (((145.22919 -37.95208, 145.22908... | 1.138618e+08 | 113.861794 | 50 | 477.922800 | 23896.14 | 25 | 83.700000 | 0.171716 | 13 | Private room | 0.439129 |
| 14 | Glen Eira | MULTIPOLYGON (((144.99705 -37.88387, 144.99675... | 3.868911e+07 | 38.689109 | 144 | 400.226111 | 57632.56 | 20 | 108.534722 | 0.121129 | 8 | Private room | 3.721978 |
| 15 | Moreland | MULTIPOLYGON (((144.93708 -37.77779, 144.93692... | 5.103969e+07 | 51.039693 | 230 | 572.373043 | 131645.80 | 24 | 101.804348 | 0.185377 | 21 | Entire home/apt | 4.506297 |
| 16 | Whitehorse | MULTIPOLYGON (((145.19571 -37.86517, 145.19532... | 6.427161e+07 | 64.271608 | 113 | 311.919469 | 35246.90 | 20 | 93.070796 | 0.101405 | 25 | Private room | 1.758164 |
| 17 | Maroondah | MULTIPOLYGON (((145.21353 -37.81177, 145.21369... | 6.139909e+07 | 61.399092 | 22 | 763.046364 | 16787.02 | 35 | 109.545455 | 0.185337 | 16 | Entire home/apt | 0.358311 |
| 18 | Manningham | MULTIPOLYGON (((145.28957 -37.76282, 145.28924... | 1.133313e+08 | 113.331309 | 71 | 923.416901 | 65562.60 | 16 | 143.281690 | 0.169968 | 14 | Private room | 0.626482 |
| 19 | Melton | MULTIPOLYGON (((144.75088 -37.81042, 144.75078... | 5.279453e+08 | 527.945346 | 20 | 788.017000 | 15760.34 | 18 | 96.050000 | 0.188226 | 18 | Private room | 0.037883 |
| 20 | Yarra Ranges | MULTIPOLYGON (((145.76502 -37.89858, 145.76055... | 2.468177e+09 | 2468.176539 | 146 | 2252.517671 | 328867.58 | 30 | 220.623288 | 0.345890 | 29 | Entire home/apt | 0.059153 |
| 21 | Boroondara | MULTIPOLYGON (((145.00504 -37.80530, 145.00527... | 6.019108e+07 | 60.191081 | 154 | 499.387532 | 76905.68 | 19 | 103.149351 | 0.153079 | 2 | Entire home/apt | 2.558519 |
| 22 | Stonnington | MULTIPOLYGON (((145.05624 -37.85293, 145.05626... | 2.563373e+07 | 25.633728 | 345 | 739.052348 | 254973.06 | 23 | 127.153623 | 0.189485 | 24 | Entire home/apt | 13.458831 |
| 23 | Port Phillip | MULTIPOLYGON (((144.99705 -37.88387, 144.99226... | 2.107243e+07 | 21.072427 | 624 | 723.264038 | 451316.76 | 15 | 130.350962 | 0.175984 | 23 | Entire home/apt | 29.612157 |
| 24 | Melbourne | MULTIPOLYGON (((144.93708 -37.77779, 144.93734... | 3.766908e+07 | 37.669076 | 1420 | 884.883042 | 1256533.92 | 15 | 123.599296 | 0.219366 | 17 | Entire home/apt | 37.696704 |
| 25 | Cardinia | MULTIPOLYGON (((145.60569 -38.33243, 145.59361... | 1.281209e+09 | 1281.209135 | 22 | 1160.553636 | 25532.18 | 32 | 145.272727 | 0.199560 | 4 | Private room | 0.017171 |
| 26 | Banyule | MULTIPOLYGON (((145.13566 -37.74089, 145.13561... | 6.263126e+07 | 62.631260 | 52 | 411.837308 | 21415.54 | 23 | 84.250000 | 0.167953 | 0 | Private room | 0.830256 |
| 27 | Darebin | MULTIPOLYGON (((144.98852 -37.77641, 144.98852... | 5.346941e+07 | 53.469414 | 172 | 447.272442 | 76930.86 | 22 | 110.226744 | 0.132982 | 6 | Entire home/apt | 3.216792 |
| 28 | Brimbank | MULTIPOLYGON (((144.88846 -37.71082, 144.88823... | 1.234358e+08 | 123.435831 | 28 | 435.386429 | 12190.82 | 25 | 71.750000 | 0.183733 | 3 | Private room | 0.226839 |
| 29 | Frankston | MULTIPOLYGON (((145.12278 -38.08509, 145.12312... | 1.294951e+08 | 129.495111 | 31 | 649.738710 | 20141.90 | 27 | 120.387097 | 0.183080 | 7 | Private room | 0.239391 |
neighbourhood_stats.sort_values('ListingsDensity', ascending = False).head(3)
| neighbourhood | geometry | area | square_km | listingsPN | AvgMRI | TotalMRI | minPrice | AvgPrice | AvgOccupancyRate | index | most_common_room_type | ListingsDensity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 24 | Melbourne | MULTIPOLYGON (((144.93708 -37.77779, 144.93734... | 3.766908e+07 | 37.669076 | 1420 | 884.883042 | 1256533.92 | 15 | 123.599296 | 0.219366 | 17 | Entire home/apt | 37.696704 |
| 23 | Port Phillip | MULTIPOLYGON (((144.99705 -37.88387, 144.99226... | 2.107243e+07 | 21.072427 | 624 | 723.264038 | 451316.76 | 15 | 130.350962 | 0.175984 | 23 | Entire home/apt | 29.612157 |
| 9 | Yarra | MULTIPOLYGON (((145.04020 -37.78422, 145.04014... | 1.955798e+07 | 19.557977 | 429 | 743.127879 | 318801.86 | 20 | 133.911422 | 0.182854 | 28 | Entire home/apt | 21.934784 |
neighbourhood_stats.sort_values('AvgPrice', ascending = False).tail(3)
| neighbourhood | geometry | area | square_km | listingsPN | AvgMRI | TotalMRI | minPrice | AvgPrice | AvgOccupancyRate | index | most_common_room_type | ListingsDensity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 26 | Banyule | MULTIPOLYGON (((145.13566 -37.74089, 145.13561... | 6.263126e+07 | 62.631260 | 52 | 411.837308 | 21415.54 | 23 | 84.25 | 0.167953 | 0 | Private room | 0.830256 |
| 13 | Knox | MULTIPOLYGON (((145.22919 -37.95208, 145.22908... | 1.138618e+08 | 113.861794 | 50 | 477.922800 | 23896.14 | 25 | 83.70 | 0.171716 | 13 | Private room | 0.439129 |
| 28 | Brimbank | MULTIPOLYGON (((144.88846 -37.71082, 144.88823... | 1.234358e+08 | 123.435831 | 28 | 435.386429 | 12190.82 | 25 | 71.75 | 0.183733 | 3 | Private room | 0.226839 |
Insights.6
To get a better geographic understanding of the Melbourne city airbnb listings
map = folium.Map(location=[-37.8, 145], zoom_start=10, tiles = 'Stamen Toner')
map
neighbourhood_stats.columns
Index(['neighbourhood', 'geometry', 'area', 'square_km', 'listingsPN',
'AvgMRI', 'TotalMRI', 'minPrice', 'AvgPrice', 'AvgOccupancyRate',
'index', 'most_common_room_type', 'ListingsDensity'],
dtype='object')
# Uniing folum and cloropleth maps to visualize the average monthly rental income in Melbourne city
folium.Choropleth(
geo_data=neighbourhood_stats,
data=neighbourhood_stats,
columns=['neighbourhood', 'AvgMRI'],
key_on='feature.properties.neighbourhood',
fill_color='Blues',
fill_opacity=0.9,
legend_name='Average monthly rental income for Airbnb hosts in Melbourne neighbourhoods'
).add_to(map)
map
# The average occupancy rate of each districts reveal that district in the north-east of Melbourne are highly in demand for tourism, like the yellow in map is Yarra Ranges
neighbourhood_stats.plot(column = 'AvgOccupancyRate', legend = True)
<AxesSubplot:>
# The listings are highest in the Melbourne district and relatively lesser in the north east - Yarra Ranges.
neighbourhood_stats.plot(column = 'listingsPN',cmap = 'OrRd', legend = True)
<AxesSubplot:>
# Most commpon room types on each neighbourhood
neighbourhood_stats.plot(column = 'most_common_room_type', legend = True )
<AxesSubplot:>
# These are the neighbourhoods that budget travellers will chose to stay as they have relatively cheap listings
neighbourhood_stats_budget = neighbourhood_stats.query('AvgPrice < 1000')
neighbourhood_stats_budget.plot(column = 'AvgPrice', legend = True, cmap = 'Blues', scheme = 'UserDefined', classification_kwds={'bins':[100, 150, 200]}, figsize = [10,15])
<AxesSubplot:>
# This map shows the lowest listing prices per neighbourhood. Again advantages for budget travellers
plot = neighbourhood_stats.plot(column = 'minPrice', legend = True, scheme = 'equalinterval', k = 5, figsize = [15, 15], alpha = 0.75)
cx.add_basemap(plot, crs = neighbourhood_stats.crs.to_string(), source = cx.providers.OpenStreetMap.CH, alpha = 0.73)
Insights.7